x86: optimize this_cpu()
authorKeir Fraser <keir.fraser@citrix.com>
Tue, 13 Jul 2010 17:17:28 +0000 (18:17 +0100)
committerKeir Fraser <keir.fraser@citrix.com>
Tue, 13 Jul 2010 17:17:28 +0000 (18:17 +0100)
Besides the .text space savings of over 2.5k on x86-64 (1.5k for
x86-32) this removes a load (plus a lea on x86-64) from various
frequently executed code paths, and finally provides a reason (other
than legibility) to prefer this_cpu() over per_cpu() in all places
where smp_processor_id() isn't being called anyway.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
xen/include/asm-x86/current.h
xen/include/asm-x86/percpu.h

index 0a423fba67f1186765d59f452ba510989f04397b..bed04f1c249597f832e4d168c63765e9f3690e72 100644 (file)
@@ -16,8 +16,12 @@ struct vcpu;
 
 struct cpu_info {
     struct cpu_user_regs guest_cpu_user_regs;
-    unsigned int         processor_id;
-    struct vcpu         *current_vcpu;
+    unsigned int processor_id;
+    struct vcpu *current_vcpu;
+    unsigned long per_cpu_offset;
+#ifdef __x86_64__ /* get_stack_bottom() must be 16-byte aligned */
+    unsigned long __pad_for_stack_bottom;
+#endif
 };
 
 static inline struct cpu_info *get_cpu_info(void)
@@ -35,7 +39,10 @@ static inline struct cpu_info *get_cpu_info(void)
 #define current               (get_current())
 
 #define get_processor_id()    (get_cpu_info()->processor_id)
-#define set_processor_id(id)  (get_cpu_info()->processor_id = (id))
+#define set_processor_id(id)  do {                                      \
+    struct cpu_info *ci__ = get_cpu_info();                             \
+    ci__->per_cpu_offset = __per_cpu_offset[ci__->processor_id = (id)]; \
+} while (0)
 
 #define guest_cpu_user_regs() (&get_cpu_info()->guest_cpu_user_regs)
 
index fb1d4140ecbb55e93ba5ad41aa5d05cbf3c69155..e6faa8ce0772a5b2ad1800b705eb2f810a2b6d30 100644 (file)
@@ -16,7 +16,7 @@ void percpu_init_areas(void);
 #define per_cpu(var, cpu)  \
     (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]))
 #define __get_cpu_var(var) \
-    (per_cpu(var, smp_processor_id()))
+    (*RELOC_HIDE(&per_cpu__##var, get_cpu_info()->per_cpu_offset))
 
 #define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name